library(survey)

# Import Data -------------------------------------------------------------
#setwd("")
source("01_cleaning_raceIAT2020.R")

# Create Targets ----------------------------------------------------------
## Picking a week to establish as baseline 
WEEKS <- unique(impdat20_bw$week)
set.seed(1693)
weight_week <- sample(WEEKS[1:16], 1) # any week between january and may
weight_week
# note: seed changes across R versions so week depends on computer run
weight_week_dat <- subset(impdat20_bw, week == weight_week  & 
                            !is.na(sex)  & !is.na(race5) & !is.na(edu_cat4) & !is.na(age_cat4) & 
                            !is.na(census_region) & !is.na(broughtwebsite2) & 
                            !is.na(ideo3))

base_week <- svydesign(~1, data = weight_week_dat)

# FUNCTION FOR CREATING TARGETS FROM AUXILIARY INFORMATION AND FORMULA
# From Caughey et al 2020
create_targets <- function (target_design, target_formula) {
  target_mf <- model.frame(target_formula, model.frame(target_design))
  target_mm <- model.matrix(target_formula, target_mf)
  wts <- weights(target_design)
  colSums(target_mm * wts) / sum(wts) # returns vector of targets
}

# Note Targets
target_formula <- ~ (race3 + ideo3_lab + sex)^2 + 
  (race3 + ideo3_lab + edu_cat4)^3 + 
  (race3 + ideo3_lab + age_cat4)^3 + 
  (race3 + ideo3_lab + census_region)^2 + 
  (race3 + ideo3_lab + broughtwebsite2)^2
pop_targets <- create_targets(base_week, target_formula)
pop_targets


# Creating Weights by Week ------------------------------------------------
impdat20.weights <- array(NA, c(1, 2))
colnames(impdat20.weights) <- c("session_id", "WeekWeights")

WEEKS <- sort(unique(impdat20_bw$week))

for(i in 1:length(WEEKS)){
  print(i/length(WEEKS))
  dat <- impdat20_bw[which(impdat20_bw$week == WEEKS[i]),]
  dat <- subset(dat, !is.na(sex) & !is.na(race5) & !is.na(edu_cat4) & !is.na(age_cat4) & !is.na(census_region)
                & !is.na(broughtwebsite2) & !is.na(ideo3))
  
  print(nrow(dat))
  
  impdat20.unw <- svydesign(ids = ~1, data = dat)
  x <- array(NA, c(nrow(dat), 2))
  x[,1] <- dat$session_id
  
  d_weighted <- calibrate(design = impdat20.unw,
                          formula = target_formula,
                          population = pop_targets,
                          calfun = "raking",
                          maxit = 2000, 
                          epsilon = 0.00191) 
  
  ## Verify targets (uncomment for weeks if wanted)
  ### means
  # unadjusted
  # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, impdat20.unw)
  # # target
  # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, base_week)
  # # rake
  # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, d_weighted)
  # unadjusted
  # svyby(~edu_cat4, ~ideo3_lab, impdat20.unw, svymean)
  # # target
  # svyby(~edu_cat4, ~ideo3_lab, base_week, svymean)
  # # rake
  # svyby(~edu_cat4, ~ideo3_lab, d_weighted, svymean)

  x[,2] <- weights(d_weighted)
  
  impdat20.weights <- rbind(impdat20.weights,x)
}

impdat20.WeekWeight <- merge(impdat20_bw, impdat20.weights, by = "session_id", all.x = T)
rm(impdat20.weights, impdat20.unw)

# summary(impdat20.weighted$WeekWeights)
# hist(impdat20.weighted$WeekWeights)
# sd(impdat20.weighted $WeekWeights, na.rm = T)

save(impdat20.WeekWeight, file = "./data/impdat20.Race.WeekWeight.rdata")
